# Access Biological Data with {gmRi}
# nmfs_bio <- gmRi::gmri_survdat_prep(survdat_source = "bio")
withr::with_dir(rprojroot::find_root('_targets.R'),
tar_load(survdat_biological))
nmfs_bio <- survdat_biological
In the original proposal it was stated that 17 species would be used, species that are regularly measured and aged. Ordering all species by the number aged we can pull out the following top species:
#### Data Prep ####
# # Manually make list:
# # species to run it on:
# vonbert_species <- sort(c("atlantic cod", "haddock", "summer flounder", "winter flounder", "acadian redfish",
# "silver hake", "red hake", "american plaice", "yellowtail flounder",
# "atlantic herring", "white hake", "butterfish","witch flounder",
# "atlantic mackerel", "windowpane"))
# names(vonbert_species) <- vonbert_species
# Rank species by how many measurements there are
species_abunds <- nmfs_bio %>%
filter(is.na(age) == FALSE) %>%
count(comname) %>% arrange(desc(n)) # ordered by number measured
# Reorder top 17 alphabetically
vonbert_species <- sort(species_abunds$comname[1:17])
These will be the species that we assess length-at-age characteristics for: acadian redfish, american plaice, atlantic cod, atlantic herring, atlantic mackerel, black sea bass, butterfish, haddock, pollock, red hake, scup, silver hake, summer flounder, white hake, winter flounder, witch flounder and yellowtail flounder
We also only care about data from the two main survey seasons, so we limit the data to just those seasons before splitting it out into lists for each species. The lists contain all data for a given species, broken into groups at a set interval of time. In this case it is five year increments.
# Name the list so it carries through
names(vonbert_species) <- vonbert_species
# Does not work for because of bad start points or no age data:
# skates, dogfish, fourspot flounder, goosefish
# Seasons to use data from
seasons <- c("Summer", "Fall")
##### Pull species and make group id for later
species_data <- map(vonbert_species, function(species_name){
#### Make 5 year increments
yr5_breaks <- seq(1970, 2020, by = 5)
yr5_ends <- seq(1974, 2024, by = 5)
yr5_labels <- str_c(yr5_breaks, "-", yr5_ends)
yr5_labels <- yr5_labels[1:(length(yr5_labels)-1)]
# Drop NA ages, set increment labels
nmfs_bio %>%
filter(comname == species_name,
season %in% seasons,
is.na(age) == FALSE) %>%
mutate(five_yr_group = cut(est_year,
breaks = yr5_breaks,
labels = yr5_labels,
include.lowest = TRUE,
ordered_result = TRUE),
decade = floor_decade(est_year),
group_id = str_c(est_year, comname, sep = "-")) %>%
as.data.frame()
})
The Von-Bertalanffy parameterization used is the “Typical” von Bertalanffy parameterization, also known as the Beverton-Holt parameterization, which is implemented using the {FSA} package.
We can check that this is the case by displaying the equations:
typical <- growthFunShow(param = "Typical", "vonBertalanffy")
ggplot() +
annotate(x = 1, y = 1, label = typical, geom = "text") +
labs(title = '"Typical" Parameterization') +
theme_void()
bholt <- growthFunShow(param = "BevertonHolt", "vonBertalanffy")
ggplot() +
annotate(x = 1, y = 1, label = bholt, geom = "text") +
labs(title = '"Beverton-Holt" Parameterization') +
theme_void()
To start things off we first set the von Bert parameterization to solve using nonlinear least squares: nls()
( vb <- vbFuns(param="Typical") )
function (t, Linf, K = NULL, t0 = NULL)
{
if (length(Linf) == 3) {
K <- Linf[[2]]
t0 <- Linf[[3]]
Linf <- Linf[[1]]
}
Linf * (1 - exp(-K * (t - t0)))
}
<bytecode: 0x7fb584b80838>
<environment: 0x7fb584bc3e28>
Then once we have set that as the function we want to use, we can write a wrapper around it that will take input length and age data, some reasonable parameter starting points, and some minimum number of observations that we set to make sure groups with very few aged fish don’t break the workflow.
# Function to Pull Vonbert Coefficients
vonbert_coef <- function(length_age_dat, start_points, min_obs = 20){
# Von Bert Fitting Using: {FSA}
# Don't run on fewer than a minimum number of obervations
num_aged <- nrow(length_age_dat)
if(num_aged < min_obs){
na_df <- data.frame("Linf" = NA, "K" = NA, "t0" = NA, "n_aged" = num_aged,
"len_age_1" = NA, "len_age_2" = NA, "len_age_3" = NA)
return(na_df)
}
# Use nls to estimate VBGF parameters using starting points
vbert_fit <- nls(length_cm ~ vb(age, Linf, K, t0),
data = length_age_dat,
start = start_points)
# Access parameters with coef()
vbert_coef <- as.data.frame(t(coef(vbert_fit))) %>%
mutate(n_aged = num_aged,
len_age_1 = Linf * (1 - exp(-1 * K * (1 - t0))),
len_age_2 = Linf * (1 - exp(-1 * K * (2 - t0))),
len_age_3 = Linf * (1 - exp(-1 * K * (3 - t0))))
return(vbert_coef)
}
This next function is a little redundant, but makes it easier to use purrr::map and repeat the estimations for each species:
# Function for running that for a single Species
species_vonbert <- function(comname, split_col, min_obs){
# Get starting points from all data
test_starts <- vbStarts(length_cm ~ age, data = species_data[[comname]])
# Get coefficients for groups
species_data[[comname]] %>%
split(.[split_col]) %>%
map_dfr(vonbert_coef, test_starts, min_obs = min_obs, .id = split_col) %>%
mutate(comname = comname)
}
Once everything is prepped we can then fun the von Bert parameter estimation for all the species we are interested using the desired groups.
# Running everything?
species_coef <- vonbert_species %>%
map_dfr(species_vonbert, split_col = "five_yr_group", min_obs = 25)
Since the table of coefficients is somewhat simple here is a function to select a species, the x_column to use (corresponding to the time grouping), and the name of the coefficient to plot.
# pick species and coefficients
plot_vonbert_coef <- function(comnames, x_col, coef_name){
x_col_sym <- sym(x_col)
coef_sym <- sym(coef_name)
coef_label <- switch (
coef_name,
Linf = "Asymptotic Max Length (cm)",
K = "Body Growth Coefficient (K)",
t0 = "Hypothetical Age of Zero Size (t0)",
n_aged = "Number Aged",
len_age_1 = "Age 1 length (cm)",
len_age_2 = "Age 2 length (cm)",
len_age_3 = "Age 3 length (cm)")
species_coef %>%
filter(comname %in% comnames) %>%
ggplot(aes(y = {{coef_sym}}, x = {{x_col_sym}})) +
geom_line(group = 1, linetype = 3) +
geom_point(size = 0.8) +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
facet_wrap(~comname, ncol = 1, scales = "free") +
labs(x = "Date Period", y = coef_label)
}
Using that function we can now take a look at the coefficients for any given species/coefficient combination.
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "Linf")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "K")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "t0")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "n_aged")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "len_age_1")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "len_age_2")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "len_age_3")
# Plotting age distribution
plot_age_bubbleplot <- function(species){
species_data[[species]] %>%
count(comname, est_year, age) %>%
ggplot(aes(y = est_year, x = age, size = n)) +
geom_point(shape = 21, color = "white", fill = gmri_cols("gmri blue")) +
scale_y_reverse() +
facet_wrap(~comname) +
labs(y = "Year", x = "Age", size = "Number Measured") +
guides(size = guide_legend(title.position = "top", title.hjust = 0.5))
}
# Process Each species
bubble_plots <- map(.x = vonbert_species, .f = plot_age_bubbleplot)
bubble_plots[["atlantic cod"]]
bubble_plots[["haddock"]]
bubble_plots[["acadian redfish"]]
bubble_plots[["winter flounder"]]
bubble_plots[["haddock"]]
# Plotting age distribution as ggridges
plot_age_ridgeplot <- function(species){
species_data[[species]] %>%
mutate(yr = factor(est_year),
yr = fct_rev(yr)) %>%
ggplot(aes(x = age, y = yr)) +
geom_density_ridges(fill = gmri_cols("gmri blue"), color = "white") +
facet_wrap(~comname) +
scale_x_continuous(limits = c(0,NA)) +
labs(x = "Age", y = "Year") +
guides(size = guide_legend(title.position = "top", title.hjust = 0.5))
}
# Process Each species
ridge_plots <- map(vonbert_species, plot_age_ridgeplot)
ridge_plots[["atlantic cod"]]
ridge_plots[["haddock"]]
ridge_plots[["acadian redfish"]]
ridge_plots[["winter flounder"]]
ridge_plots[["haddock"]]